################################################################################
#' Script to use the Twitter API to rehydrate (get full data of) tweets for 
#' which (only) the status_id is known. This requires setup of a Twitter 
#' Developer account and an API key. See rtweet documentation. 
#' 
#' Thanks to Ray Serrato. 
#' 
#' Copyright (c) 2020-2021 Momin M. Malik
#' This project is licensed under the terms of the MIT license.
################################################################################
library(rtweet)

status_ids <- read.csv("status_ids.csv")[,1]

# Rehydration code. Requires Twitter API key set up in rtweet
lim <- 90000L
batch <- as.integer(floor(length(status_ids)/lim))
x <- c(rep(1L:batch, each = lim), rep(batch+1L, length(status_ids)-batch*lim))
length(x)==length(status_ids)

ldf <- rep(list(NULL), batch+1)
ptm <- proc.time()
ldf[[1]] <- lookup_tweets(status_ids[x==1])
print(proc.time() - ptm)
print(1)
for (i in 2L:(batch+1L)) {
  print("Sleeping")
  Sys.sleep(max(901 - (proc.time() - ptm)["elapsed"],1))
  print("Finished sleeping")
  ptm <- proc.time()
  ldf[[i]] <- lookup_tweets(status_ids[which(x==i)])
  print(proc.time() - ptm)
  print(i)
}

rm(lim,batch,x,i,ptm)

# Check rows. A few tweets may be missing; these can be checked via their 
# URL. I have only found these to be deleted or otherwise unavailable.
colSums(Reduce(rbind,lapply(ldf, dim)))[1]

df <- do_call_rbind(ldf)
rm(ldf)

save.image("rehydrated_tweets.RData")